miχpods - TAO#

  • eulerian 0-40m control volume

  • focus on Jq

  • use model chl / double exponentials

%load_ext watermark

import os

import cf_xarray
import dask
import dcpy
import distributed
import flox.xarray
import hvplot.xarray
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
import dcpy
import glob


import pump
from pump import mixpods

dask.config.set({"array.slicing.split_large_chunks": False})
mpl.rcParams["figure.dpi"] = 140
xr.set_options(keep_attrs=True)

gcmdir = "/glade/campaign/cgd/oce/people/bachman/TPOS_1_20_20_year/OUTPUT/"  # MITgcm output directory
stationdirname = gcmdir

%watermark -iv
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/dask_jobqueue/core.py:20: FutureWarning: tmpfile is deprecated and will be removed in a future release. Please use dask.utils.tmpfile instead.
  from distributed.utils import tmpfile
matplotlib : 3.6.3
dask       : 2023.1.0
distributed: 2023.1.0
flox       : 0.6.7
hvplot     : 0.8.2
json       : 2.0.9
numpy      : 1.23.5
xarray     : 2023.1.0
pump       : 0.1
cf_xarray  : 0.7.7
dcpy       : 0.1.dev385+g121534c
sys        : 3.10.8 | packaged by conda-forge | (main, Nov 22 2022, 08:26:04) [GCC 10.4.0]
import dask_jobqueue

if "client" in locals():
    client.close()
    del client
if "cluster" in locals():
    cluster.close()

cluster = dask_jobqueue.PBSCluster(
    cores=4,  # The number of cores you want
    memory="23GB",  # Amount of memory
    processes=1,  # How many processes
    queue="casper",  # The type of queue to utilize (/glade/u/apps/dav/opt/usr/bin/execcasper)
    local_directory="/local_scratch/pbs.$PBS_JOBID",
    log_directory="/glade/scratch/dcherian/dask/",  # Use your local directory
    resource_spec="select=1:ncpus=4:mem=23GB",  # Specify resources
    project="ncgd0011",  # Input your project ID here
    walltime="02:00:00",  # Amount of wall time
    interface="ib0",  # Interface to use
)
cluster.adapt(minimum_jobs=2, maximum_jobs=8)
# cluster.scale(4)
client = distributed.Client(cluster)
client

Client

Client-3953f65f-a36d-11ed-91ee-3cecef1acbfa

Connection method: Cluster object Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/dcherian/proxy/8787/status

Cluster Info

tao_gridded = mixpods.load_tao()
tao_gridded["Ih"] = 0.45 * tao_gridded.swnet * np.exp(-0.04 * np.abs(tao_gridded.mldT))
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xarray/core/dataset.py:256: UserWarning: The specified Dask chunks separate the stored chunks along dimension "depth" starting at index 42. This could degrade performance. Instead, consider rechunking after loading.
  warnings.warn(
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xarray/core/dataset.py:256: UserWarning: The specified Dask chunks separate the stored chunks along dimension "time" starting at index 199726. This could degrade performance. Instead, consider rechunking after loading.
  warnings.warn(
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xarray/core/dataset.py:256: UserWarning: The specified Dask chunks separate the stored chunks along dimension "longitude" starting at index 2. This could degrade performance. Instead, consider rechunking after loading.
  warnings.warn(

Moum et al (2013) Climatologies#

  • The published .nc file seems “over cleaned” with mask ε < 3e-6 or so

tropflux = xr.open_mfdataset(
    sorted(glob.glob("/glade/work/dcherian/datasets/tropflux/**/*.nc")),
    engine="netcdf4",
    parallel=True,
)

tropflux_0140 = tropflux.interp(latitude=0, longitude=360 - 140).load()
tropflux_clim = tropflux_0140.groupby("time.month").mean()

Monthly climatology, Vertical mean#

Didn’t work with published nc file#

clim = xr.Dataset()
clim["netflux"] = (
    tropflux_0140.netflux.sel(time=slice("2005", "2011")).groupby("time.month").mean()
)
clim["Ih"] = tao_gridded.Ih.sel(time=slice("2005", "2011")).groupby("time.month").mean()
clim["Jq"] = (
    tao_gridded.Jq.sel(time=slice("2005", "2011"), depthchi=slice(-60, -20))
    .groupby("time.month")
    .mean(["depthchi", "time"])
)

(-1 * clim.Jq).plot(ylim=(0, 200))
(clim.netflux - clim.Ih).plot(ylim=(0, 200))
[<matplotlib.lines.Line2D>]
_images/mixpods-tao_7_1.png

Try with .mat file#

cchdo = dcpy.oceans.read_cchdo_chipod_file(
    "/glade/u/home/dcherian/datasets/microstructure/osu/chipod/tao/chipods_0_140W.nc"
)
cchdo
<xarray.Dataset>
Dimensions:     (depth: 7, time: 107588)
Coordinates:
    timeSeries  (depth) float64 ...
  * time        (time) datetime64[ns] 2005-09-23T04:30:00 ... 2017-12-31T23:2...
    lat         (depth) float64 dask.array<chunksize=(7,), meta=np.ndarray>
    lon         (depth) float64 dask.array<chunksize=(7,), meta=np.ndarray>
  * depth       (depth) float64 29.0 39.0 49.0 59.0 69.0 89.0 119.0
Data variables:
    T           (depth, time) float64 dask.array<chunksize=(7, 10000), meta=np.ndarray>
    dTdz        (depth, time) float64 dask.array<chunksize=(7, 10000), meta=np.ndarray>
    N2          (depth, time) float64 dask.array<chunksize=(7, 10000), meta=np.ndarray>
    KT          (depth, time) float64 dask.array<chunksize=(7, 10000), meta=np.ndarray>
    chi         (depth, time) float64 dask.array<chunksize=(7, 10000), meta=np.ndarray>
    eps         (depth, time) float64 dask.array<chunksize=(7, 10000), meta=np.ndarray>
    Jq          (depth, time) float64 dask.array<chunksize=(7, 10000), meta=np.ndarray>
Attributes: (12/68)
    ncei_template_version:           NCEI_NetCDF_TimeSeries_Orthogonal_Templa...
    featureType:                     timeSeries
    title:                           Turbulence quantities measured by chipod...
    summary:                         Turbulence data in upper w m / all good ...
    keywords:                        sea_water_temperature, square_of_brunt_v...
    Conventions:                     CF-1.6, ACDD-1.3
    ...                              ...
    reference8:                      Moum, J.N., Ocean speed and turbulence m...
    reference9:                      Warner, S.J., J. Becherer, K. Pujiana, E...
    reference10:                     Moum, J.N., K. Pujiana, R-C. Lien and W....
    reference11:                     Becherer, J. and J.N. Moum, An efficient...
    reference12:                     Moulin, A.J., J.N. Moum and E.L. Shroyer...
    reference13:                     Thakur, R., E.L. Shroyer, R. Govindaraja...
def read_chipod_mat_file(fname):
    import h5py
    import pandas as pd

    file = h5py.File(
        "/glade/u/home/dcherian/work/pump/datasets/microstructure/chipods_0_140W_hourly.mat",
        mode="r",
    )

    chipod = xr.Dataset()

    chipod["depth"] = ("depth", file["chr"]["depth"][:].squeeze())
    chipod["time"] = ("time", file["chr"]["time"][:].squeeze())
    chipod["time"] = pd.to_datetime(chipod.time.data - 719529, unit="D")
    for var in ["Jq", "Kt", "N2", "T", "chi", "dTdz", "eps"]:
        chipod[var] = xr.DataArray(file["chr"][var][:, :], dims=("time", "depth"))
    return chipod


mat = read_chipod_mat_file(
    os.path.expanduser("~/work/pump/datasets/microstructure/chipods_0_140W_hourly.mat")
)
mat.Jq.where(np.abs(mat.Jq) < 10000).groupby("time.year").mean().sel(
    year=slice(2005, 2011)
).hvplot.line(
    by="year", y="depth", flip_yaxis=True
)  # .opts(legend_cols=1)

Compare#

h1 = (
    cchdo.Jq.reset_coords(drop=True)
    .resample(time="M")
    .mean()
    .hvplot.quadmesh(
        cmap="greys",
        flip_yaxis=True,
        frame_width=1200,
        frame_height=200,
        clim=(40, -100),
    )
)
h2 = (
    mat.Jq.resample(time="M")
    .mean()
    .hvplot.quadmesh(
        cmap="greys",
        flip_yaxis=True,
        frame_width=1200,
        frame_height=200,
        clim=(40, -100),
    )
)
(h2.opts(title=".mat") + h1.opts(title="CCHDO")).cols(1)
h = (
    (
        np.abs(cchdo.Jq.resample(time="D").mean())
        .reset_coords(drop=True)
        .hvplot.line(by="depth", x="time")
        .opts(frame_width=1200)
        + np.abs(mat.Jq.resample(time="D").mean())
        .reset_coords(drop=True)
        .hvplot.line(by="depth", x="time")
        .opts(frame_width=1200)
    )
    .opts(shared_axes=True)
    .cols(1)
)
h
def plot_monthly_clim(ds, **kwargs):
    return np.abs(
        ds.Jq.sel(time=slice("2005", "2011-02"))
        .sel(depth=slice(20, 60))
        .mean("depth")
        .groupby("time.month")
        .mean()
    ).hvplot.line(**kwargs)


(
    plot_monthly_clim(mat, label=".mat")
    * plot_monthly_clim(mat.where(np.abs(mat.Jq) < 20000), label=".mat, Jq < 20000")
    * plot_monthly_clim(mat.where(np.abs(mat.Jq) < 10000), label=".mat, Jq < 10000")
    * plot_monthly_clim(mat.where(np.abs(mat.Jq) < 3000), label=".mat, Jq < 3000")
    * plot_monthly_clim(mat.where(mat.eps < 5e-4), label=".mat, ε < 5e-4")
    * plot_monthly_clim(mat.where(mat.eps < 1e-5), label=".mat, ε < 1e-5")
    * plot_monthly_clim(mat.where(mat.eps < 3e-6), label=".mat, ε < 3e-6")
    * plot_monthly_clim(cchdo, label="CCHDO", color="k")
    * (clim.netflux - clim.Ih)
    .rename("a")
    .hvplot.line(label="Jq0 - Ih", color="k", line_width=4)
).opts(show_grid=True, legend_position="right", frame_width=600, ylim=(0, 160))

Seasonal climatology, Vertical profile#

def plot_seasonal_clim(ds, **kwargs):
    return (
        np.abs(
            ds.Jq.sel(time=slice("2005", "2011-02"))
            .sel(depth=slice(20, 70))
            # .mean("depth")
            .groupby("time.season")
            .mean()
            .sel(season=["DJF", "MAM", "JJA", "SON"])
        ).hvplot.line(flip_yaxis=True, y="depth", col="season", **kwargs)
        # .opts()
    )


h = (
    plot_seasonal_clim(mat, label=".mat")
    * plot_seasonal_clim(mat.where(mat.eps < 1e-4), label=".mat, ε < 1e-4")
    * plot_seasonal_clim(mat.where(mat.eps < 1e-5), label=".mat, ε < 1e-5")
    * plot_seasonal_clim(mat.where(mat.eps < 3e-6), label=".mat, ε < 3e-6")
    * plot_seasonal_clim(cchdo, label="CCHDO", color="k")
)
h.opts(plot_size=(270, 350), shared_xaxis=True, shared_yaxis=True, show_legend=True)
from cycler import cycler

with plt.rc_context({"axes.prop_cycle": cycler(color=["k", "r", "b", "g"])}):
    (
        (-1 * cchdo.Jq.where(cchdo.Jq > -1e3).squeeze())
        .sel(time=slice("2005", "2012-02-01"))
        .groupby("time.season")
        .mean()
        .sel(season=["DJF", "MAM", "JJA", "SON"])
        .cf.plot(y="depth", hue="season")
    )
_images/mixpods-tao_19_0.png

Labeling ENSO phase transitions#

pump.obs.process_oni().sel(time=slice("2005-Oct", "2016-Feb")).reset_coords(
    drop=True
).plot.line(aspect=3, size=5, color="k")
pump.obs.make_enso_transition_mask().sel(
    time=slice("2005-Oct", "2016-Feb")
).reset_coords(drop=True).plot.line(ax=plt.gca().twinx(), marker="x")
plt.grid(True, which="both", axis="both")
_images/mixpods-tao_21_0.png

N2 vs N2T#

Lot more data with N2T

tao_gridded.N2.cf.plot()
<matplotlib.collections.QuadMesh>
_images/mixpods-tao_23_1.png
tao_gridded.N2T.cf.plot()
<matplotlib.collections.QuadMesh>
_images/mixpods-tao_24_1.png

PDFs change#

fg = tao_gridded.n2s2pdf.sel(
    enso_transition_phase=[
        "La-Nina cool",
        "La-Nina warm",
        "El-Nino warm",
        "El-Nino cool",
    ]
).plot(vmax=1, col="enso_transition_phase")
fg.set_titles("{value}")
fg.map(dcpy.plots.line45)
fg.axes[0, 0].set_xlim((-4.5, -2.5))
fg.axes[0, 0].set_ylim((-4.5, -2.5))
(-4.5, -2.5)
_images/mixpods-tao_26_1.png
(
    tao_gridded.n2s2pdf.sel(
        enso_transition_phase=[
            "La-Nina cool",
            "La-Nina warm",
            "El-Nino warm",
            "El-Nino cool",
        ]
    )
    .sum("N2_bins")
    .plot(hue="enso_transition_phase")
)
plt.figure()
(
    tao_gridded.n2s2pdf.sel(
        enso_transition_phase=[
            "La-Nina cool",
            "La-Nina warm",
            "El-Nino warm",
            "El-Nino cool",
        ]
    )
    .sum("S2_bins")
    .plot(hue="enso_transition_phase")
)
[<matplotlib.lines.Line2D>,
 <matplotlib.lines.Line2D>,
 <matplotlib.lines.Line2D>,
 <matplotlib.lines.Line2D>]
_images/mixpods-tao_27_1.png _images/mixpods-tao_27_2.png
fg = tao_gridded.n2s2Tpdf.sel(
    enso_transition_phase=[
        "La-Nina cool",
        "La-Nina warm",
        "El-Nino warm",
        "El-Nino cool",
    ]
).plot(vmax=1, col="enso_transition_phase")
fg.set_titles("{value}")
fg.map(dcpy.plots.line45)
fg.axes[0, 0].set_xlim((-4.5, -2.5))
fg.axes[0, 0].set_ylim((-4.5, -2.5))
(-4.5, -2.5)
_images/mixpods-tao_28_1.png
(
    tao_gridded.n2s2Tpdf.sel(
        enso_transition_phase=[
            "La-Nina cool",
            "La-Nina warm",
            "El-Nino warm",
            "El-Nino cool",
        ]
    )
    .sum("N2_bins")
    .plot(hue="enso_transition_phase")
)
plt.figure()
(
    tao_gridded.n2s2Tpdf.sel(
        enso_transition_phase=[
            "La-Nina cool",
            "La-Nina warm",
            "El-Nino warm",
            "El-Nino cool",
        ]
    )
    .sum("S2_bins")
    .plot(hue="enso_transition_phase")
)
[<matplotlib.lines.Line2D>,
 <matplotlib.lines.Line2D>,
 <matplotlib.lines.Line2D>,
 <matplotlib.lines.Line2D>]
_images/mixpods-tao_29_1.png _images/mixpods-tao_29_2.png